Import avocado data.

avo_df = 
  read_csv("data/avocado.csv") %>% 
  janitor::clean_names() %>% 
  select(-1) %>% 
  separate(date, c("year", "month", "day"), remove = FALSE) %>% 
  mutate(
    year = as.integer(year),
    month = as.integer(month),
    day = as.integer(day)
  ) %>% 
  rename(
    small = x4046,
    large = x4225,
    extra_large = x4770,
  ) 

avo_tidy = 
  avo_df %>% 
  pivot_longer(
    small:extra_large,
    names_to = "fruit_size",
    values_to = "quantity_sold"
  ) %>% 
  pivot_longer(
    total_bags:x_large_bags,
    names_pattern = "(.*)_bags",
    names_to = "bag_type",
    values_to = "bag_sold",
  ) %>% 
  mutate(
    bag_type = recode(bag_type, x_large = "extra_large")
  )

avo_tidy
## # A tibble: 218,988 x 12
##    date        year month   day average_price total_volume type  region
##    <date>     <int> <int> <int>         <dbl>        <dbl> <chr> <chr> 
##  1 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  2 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  3 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  4 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  5 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  6 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  7 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  8 2015-12-27  2015    12    27          1.33       64237. conv… Albany
##  9 2015-12-27  2015    12    27          1.33       64237. conv… Albany
## 10 2015-12-27  2015    12    27          1.33       64237. conv… Albany
## # … with 218,978 more rows, and 4 more variables: fruit_size <chr>,
## #   quantity_sold <dbl>, bag_type <chr>, bag_sold <dbl>

Description:
year: 2015-2018
month: 1-12
day: 1-31
type: conventional, organic
fruit_size: small, large, extra_large
bag_type: total, small, large, extra_large

gdp_df = 
  read_csv("data/gdp-by-state.csv") %>% 
  janitor::clean_names() %>% 
  select(-1, -x2013, -x2014) %>% 
  pivot_longer(
    x2015:x2017,
    names_prefix = "x",
    names_to = "year",
    values_to = "gdp"
  )

gdp_df
## # A tibble: 180 x 3
##    area          year    gdp
##    <chr>         <chr> <dbl>
##  1 United States 2015  50301
##  2 United States 2016  50660
##  3 United States 2017  51337
##  4 Alabama       2015  36818
##  5 Alabama       2016  37158
##  6 Alabama       2017  37508
##  7 Alaska        2015  65971
##  8 Alaska        2016  63304
##  9 Alaska        2017  63610
## 10 Arizona       2015  38787
## # … with 170 more rows

Description:
year: 2015-2017

wo jue de ke yi zhao you guan xi de che yi che ? https://www.medicalnewstoday.com/articles/270406#benefits https://pdf.usaid.gov/pdf_docs/PA00KP28.pdf

yao bu zhe li zai gao dian data fao.org/faostat/en/#search/Avocados

https://quickstats.nass.usda.gov/results/8A9760E3-BDB0-3A88-B014-DA81BA0845BD

Volume consumption by year: conventional vs. organic

fig1 = 
  avo_df %>% 
  group_by(year, type) %>% 
  summarise(sum_volume = sum(total_volume)) %>% 
  ggplot(aes(x = year, y = sum_volume, fill = type)) +
  geom_bar(stat="identity") +
  labs(
    title = "United States Avocado Consumption (2015-2018)",
    x = "Year",
    y = "Volume Consumption",
    color = "Type"
    ) + 
  scale_fill_viridis_d(direction = -1) +
  theme(legend.position = "none")

fig2 =
  avo_df %>% 
  group_by(type) %>% 
  summarise(sum_volume = sum(total_volume)) %>%
  ggplot(aes(x="", y=sum_volume, fill=type)) +
  geom_bar(stat="identity", width=1, color = "white") +
  coord_polar("y", start=0) +
  theme_void() + # remove background, grid, numeric labels
  labs(
    fill = "Type",
    caption = "https://hassavocadoboard.com/"
    ) +
  scale_fill_viridis_d(direction = -1)

fig1 + fig2

Time vs. Avocado Consumption by Region

time_fig = 
  avo_df %>% 
  group_by(year, month, region) %>% 
  summarise(sum_volume = sum(total_volume)) %>% 
  ggplot(aes(x = month, y = sum_volume, color = region)) +
  geom_point(size = 0.5) +
  geom_smooth(size = 0.3, se = FALSE) + 
  facet_grid(~year) +
  labs(
    title = "Time vs. Avocado Consumption by Region",
    x = "Month",
    y = "Volume Consumption",
    color = "Region",
    caption = "https://hassavocadoboard.com/"
    ) + 
  scale_x_continuous(
    breaks = c(seq(1, 12, by = 3)),
    labels = c(seq(1, 12, by = 3))
    ) 

ggplotly(time_fig)

Time vs. Avocado Price by Region

time_fig = 
  avo_df %>% 
  group_by(year, month, region) %>% 
  summarise(mean_price = mean(average_price)) %>% 
  ggplot(aes(x = month, y = mean_price, color = region)) +
  geom_point(size = 0.5) +
  geom_smooth(size = 0.3, se = FALSE) + 
  facet_grid(~year) +
  labs(
    title = "Time vs. Avocado Price by Region",
    x = "Month",
    y = "Average Price",
    color = "Region",
    caption = "https://hassavocadoboard.com/"
    ) + 
  scale_x_continuous(
    breaks = c(seq(1, 12, by = 3)),
    labels = c(seq(1, 12, by = 3))
    ) 

ggplotly(time_fig)